#note: data and graphs appear roughly in the order they appear in the paper.
#some code is redundant and may involve more steps than stricly necessary, 
    #but this is done for for the sake of clarity.

library(foreign)#allow for import of Stata datasets
#note: you do not need to change working directory if data is already stored in R working directory
#if not, change the working directory to where the files are stored using the command below:
#setwd("C:\\Documents and Settings\\....")

#get 2006 Data
#use 2004 vars from 2006 data to account for GA switch of 3rd and 8th districts
    #for s-v analyis
house.2006 <- read.dta("2006_house_data.dta")
attach.all(house.2006)
i2006 <- house.2006[,"incumb06"]#incumbency in 2006
unc2006 <- house.2006[,"uncontested06"]#uncontested in 2006
dvote.2006 <- house.2006$dvoteimputed#imputed vote in 2006
adv.2006 <- mean(dvote.2006) #mean avg. district vote (imputed)
winner.2006 <- house.2006$winner#winner of each race (1 for Dems, 0 for GOP
dem.seats.2006 <- mean(winner.2006) #percent of Dem seats (233/435); counting FL-13 for GOP
v2004 <- dlag06imputed #use imputed lagged vote to get 2004 vote for 2006 s-v prediction
i2004 <- incumb04 #incumbency in 04 for s-v prediction


#get data from all years for time-series plots
#need average district vote and dem per seats

#first get dem seats for 1946--2006; unit of observation here is the election year (i.e. "aggregate")
agg.house.data <- read.dta("House_1946-2006_aggregate.dta")
attach.all(agg.house.data)
unique.year.all <- unique(year)#create variable for each uniqye year
dem.seats <- dem.seats.per#Dem percentage of seats in House stemming from each election
#get total vote for 1946-2004
dem.per.total <- dem.per.total#dems percentage of total vote in each election
#get total vote for 2006
attach.all(house.2006)
dem.total.2006 <- sum(dem.total)/(sum(dem.total)+sum(gop.total))#total vote in 2006
#add on 2006 to total
dem.per.total[31] <- dem.total.2006#add 2006 to total vote vector (since it's not part of main dataset yet

#get average district vote for all years
###########################################
detach()
house.4604 <- read.dta("House_1946_2004_updated.dta") #1946-2004 data -- statecd is unit
attach.all(house.4604)
house.4604 <- house.4604[winner < 9,]#drop handful of 3rd party candidates
                        #not-including Bernie Sanders, who we count as a Democrat
detach()
attach.all(house.4604)
unique.year.4604 <- unique(year)#create year var for 1946-2004 (helps avoid confusion when looping)
adv <- rep(NA, length(unique.year.4604))#create empty vector for imputed avg. district vote, 1946-2004
for (i in 1:length(unique.year.4604)){
    adv[i] <- mean(dvoteimputed[year==unique.year.4604[i]])#get adv for each year, 1946-2004
}
adv[31] <- adv.2006#add in 2006 (since it's not part of main dataset yet)

########################################################################

#FIGURE 1: Plot avg. dist. vote versus percent of seats time series for 1946-2000
year.at.vec <- c((seq(1946, 2006, by = 2)))#use to plot tick marks on x-axis
year.label.vec <- c("", "", "1950", "","","", "", "1960", "","","", "", "1970",
    "","","", "", "1980", "","","", "", "1990", "","","", "", "2000","", "", "2006" )#x-axis labels
pdf("avd_vs_seats_allyears.pdf", height = 9, width = 21)
par(mfrow=c(1,1), mar = c(7,7.1,2,.5))#set up margins for plot
plot(unique.year.all, adv, axes = F, type = "n", xlab = "", 
    ylab = "", xlim = c(1946, 2010), ylim = c(.43,.68), xaxs="i", yaxs="i",
    main = "")#call empty plot so can add shading first, or else shading would block out lines
#add shading for different periods of party control GOP: 1946-1948, 1952-54, 1994-2006
shade.color = "gray92"
polygon(x=c(1946, 1946, 1948, 1948),
    y=par()$usr[c(3,4,4,3)],
    col= shade.color,   ## desired color
    border=F)         ## no border
polygon(x=c(1952, 1952, 1954, 1954),
    y=par()$usr[c(3,4,4,3)],
    col= shade.color,   ## desired color
    border=F)         ## no border
polygon(x=c(1994, 1994, 2006, 2006),
    y=par()$usr[c(3,4,4,3)],
    col= shade.color,   ## desired color
    border=F)         ## no border
points(unique.year.all, adv, type = "l")#points/line for avg. district vote
points(unique.year.all, dem.seats, type = "l", lty = 2)##points/line for dem percent of seats
axis(1,  at = year.at.vec, labels = year.label.vec, las = 1, cex.axis = 1.8, mgp = c(2,1.5,0))
axis(2, las = 1, at = c(seq(.4, .70, by = .05)), 
    label = c("40", "45%", "50%", "55%", "60%", "65%", "70%"), cex.axis = 1.8,mgp = c(2,1.2,0))
segments(1946, .5, 2006, .5,  col="gray", lwd=.5)#light line for 50%
text(1975, .52, "Average district vote\nfor Democrats", cex = 1.7)#labels for each line
segments(1973, .534, 1972.6, .547)
text(1982, .66, "Democrats' percentage\n of House seats", cex = 1.7)#labels for each line
segments(1978, .66, 1977.6, .65)
dev.off()

###################################################################


#FIGURE 2: historical seats-votes curves
    #we are uses 1958-2004 (non 02 years) to validate.
    #however, we use regressions from 1946-2004 to get coefficients to set parameter estimates below
#first, use 1946-2004

attach.all(house.4604)
house.4604no02 <- house.4604[year !=1952 & year != 1962 & year != 1972 
    & year !=1982 & year != 1992 & year != 2002,]
attach.all(house.4604no02)
unique.year.4604no02 <- unique(year)
coefs <- array(NA, c(length(unique.year.4604no02 ), 4)) #create empty matrix to store coefficients
resid.errors <- rep(NA, length(unique.year.4604no02 )) #empty vector to store residual errors
for (i in 1:length(unique.year.4604no02 )){#loop over each 
    fit <- lm(dvoteimputed ~ dlagimputed + incumb +  
        partycontrol, subset = year == unique.year.4604no02[i])
    coefs[i,] <- coef(fit)
    resid.errors[i] <- sigma.hat(fit)
        }

#now use only 1958 on, non redistricting years (02)
attach.all(house.4604)
house.data.replicate <- house.4604[year > 1956 & year !=1952 & year != 1962 & year != 1972 
    & year !=1982 & year != 1992 & year != 2002,]
detach()
attach.all(house.data.replicate)
unique.year.rep <- unique(year)
keep <- ifelse(unique.year.all > 1956 & unique.year.all !=1952 & #use indicator to pull out adv and seats 
    unique.year.all != 1962 & unique.year.all != 1972            #for validation years only
    & unique.year.all !=1982 & unique.year.all != 1992 
    & unique.year.all != 2002 & unique.year.all != 2006, 1, 0)
adv.rep <- adv[keep==1]
dem.seats.rep <- dem.seats[keep==1]

#set up vectors and parameter estimates for validation loop
n.sims <- 1000 #number of simulations
inc.impute <- .75 #for uncontested races
sbar.rep <- rep (NA, n.sims)#vector for predicted seats -- redrawn for each year.
axis.size <- 1 #control size of axis labels
predicted.seats <- rep(NA, length(unique.year.rep))#predicted number of seats, based on actual adv
predictive.error <- rep(NA,  length(unique.year.rep))#predictive error of model
predictive.sd <- rep(NA, length(unique.year.rep)) #standard deviation of seats at actual adv
partisan.bias <- rep(NA, length(unique.year.rep))#partisan bias in each year (not used in paper)
################################################################################
#note: this loop will take a long time to run with 1,000 sims 
    #(could lower to 100 with no substantive interpretative loss)
#create Figure 2
pdf("Seats_Votes_Curves_Over_Time_all_years.pdf", height = 8, width = 10)
#use layout command to set up different size panels, so that we can plot y-axis labels in 1st column
    #and x-axis labels on bottom row (i.e. not separate labels for each year.
    #For more on layout command see Murrell's "R Graphics" book
left.panel.width <- .8 # control width of left-hand panel (see "widths" in layout command)
layout(rbind(c(20, 1, 2,3,4,5), c(21, 6, 7,8,9,10), #order so that each year first, then y-axis labels, then x-axis lables
    c(22,11, 12, 13, 14, 15), c(23, 16, 17, 18, 19, 28),
    c(30, 24, 25, 26, 27, 29)),                          # then column labels, then row labels
     widths = cbind(c(left.panel.width,2,2,2,2,2), c(left.panel.width,2,2,2,2,2),c(left.panel.width,2,2,2,2,2),
        c(left.panel.width,2,2,2,2,2),c(left.panel.width,2,2,2,2,2)),
    heights = c(1,1,1,1,.6))#make last row smaller since it's just a label, no graph
#layout.show(30)#this shows how layout looks in R Window (but must be commented out when creating PDF)
for (i in 1:length(unique.year.rep)){#loop over validation years
rho <-  mean(coefs[i:(i+4),2])#get rho by taking mean coef from 5 years leading up to election year
sigma <- mean(resid.errors[i:(1+4)])#get mean residual standard error from past 5 years
phi <- unique(incumb.effect[year == unique.year.rep[i]]) #get estimated incumbency advantage for particular election year
v.lag <- dlagimputed[year==unique.year.rep[i]]#imputed vote
inc.lag <- incumb.lag[year==unique.year.rep[i]]#lagged incumbency
inc <-  incumb[year==unique.year.rep[i]]#incumbency status
unc <- uncontested[year==unique.year.rep[i]]#uncontested status
vbar.rep.lag <- mean(v.lag)#mean of vote from last election
vbar.rep.range <- round(vbar.rep.lag,2) + seq(-.1,.1,.0015) #create range from .45 to 55, at every .015 of seats
sbar.rep.expected <- rep (NA, length(vbar.rep.range))#set up vector for predicted seats over each interval in range
sbar.rep.sd <- rep (NA, length(vbar.rep.range))#set up vector for standard deviation of predicted seats
for (j in 1:length(vbar.rep.range)){#loop over intervals of vbar.rep
 vbar.rep <- vbar.rep.range[j]
 for (s in 1:n.sims){#loop ove simulations
   v.adj.lag <- v.lag - phi*inc.lag #adjusted vote, taking out incumbency
   normvote <- .5 + rho*(v.adj.lag - .5) #normal vote
   locfree <- normvote + phi*inc #location free: normal vote plus adjusted vote
   locfreenoisy <- rnorm(length(locfree), locfree, sigma) #add noise to loc.free var
   withuncs <- ifelse(unc==-1, 1-inc.impute,#add in uncontesteds (.25 and .75)
                           ifelse (unc==1, inc.impute, locfreenoisy))
   swingfree <- withuncs + mean(v.lag) - mean(withuncs) #take out swing
   v.predict <- swingfree + vbar.rep - mean(swingfree) #get predicted vote
   sbar.rep[s] <- mean(v.predict>.5)#predicted seats = percent of sims where predicted vote > .5
 }

sbar.rep.expected[j] <- mean(sbar.rep) #for each interval, median predicted seats across simulations
sbar.rep.sd[j] <- sd(sbar.rep) #for each interval, get standard deviation of seats given votes across sims
}

predicted.seats[i]<- mean(sbar.rep.expected[round(vbar.rep.range,2)==round(adv.rep[i],2)])#use mean b/c it's possible for multiple values 
                                                                                           #of vbar.rep.range to equal adv in a given year
predictive.error[i] <- dem.seats.rep[i] - predicted.seats[i]
partisan.bias[i] <- mean(2*(sbar.rep.expected[round(vbar.rep.range,2)==.50]-.5))
predictive.sd[i] <- mean(sbar.rep.sd[round(vbar.rep.range,2)==round(adv.rep[i],2)])

#first: within loop, plot each year
par(mar=c(2,2,2,2), pty ="s")#set margins for each panel; pty = s makes plots square
plot(adv.rep[i],dem.seats.rep[i],#plot adv vs. predicted seats
    cex = 1.1,  pch = 19, type="p", xlim = c(.4,.7), ylim=c(.4,.7),  
     main = unique.year.rep[i], xlab = "", 
    ylab = "", mgp = c(2,.5,0), axes = F, xaxs = "i", yaxs="i")#turn off axes so can use "axis" command for fine control
points(vbar.rep.range, sbar.rep.expected, type="l")#plot seats-votes
#lines(lowess(vbar.rep.range, sbar.rep.expected))#if you want to plot lowess 
axis(1, at = c(.4,.5,.6,.7), label = c("40%","50%","60%","70%"),  mgp = c(2,.5,0), cex.axis=axis.size)
axis(2,las = 1,  at = c(.4,.5,.6,.7), label = c("40%","50%","60%","70%"), mgp = c(2,.5,0), cex.axis=axis.size)
lines (c(0,1),c(.5,.5), col="gray", lwd=.5)#add light lines at .5
lines (c(.5,.5),c(0,1), col="gray", lwd=.5) 
box()
}

#outside loop:  add labels
#first: y-axis (4 rows)
for (i in 1:4){
par(mar=c(0,0,0,0))
plot(.5,.5, xlim = c(.4, .7), ylim = c(.4, .7), type = "n", axes = F, xlab = "", ylab = "")#call empty plot to keep on same scale
text(.55, .55, "Democratic Share\nof House Seats", cex = 1.2, srt = 90, xpd = NA)
}
#second: x-axis (5 columns)
for (i in 1:5){
par(mar=c(0,0,0,0))
plot(.5,.5, xlim = c(.4, .7), ylim = c(.4, .7), type = "n", axes = F, xlab = "", ylab = "")#call empty plot to keep on same scale
text(.55, .65, "Avg. District Vote\nfor Democrats", cex = 1.2, xpd = NA)
}
dev.off()

rmse.predict <- sqrt(sum(predictive.error^2)/length(unique.year.rep))#Get RMSE for prediction; 
                                                                     #this is used for generating probabilities for 2006 and 2008
rmse.sd <- sqrt(sum(predictive.sd^2)/length(unique.year.rep))#RMSE of standard deviation of seats

################################################################################

#plot 2006 predicted seats-votes curve, plus actual election results
    #note: if you haven't run loop above, set rmse.predict = to .0222
#paramater estimates for 2006
phi <- .08
rho <- .71
sigma <- .066 
inc.impute <- .75 #for uncontested races

n.sims <- 1000
vbar.2004 <- mean(v2004)
vbar.range <- round(vbar.2004,2) + seq(-.1,.1,.002) #create range from 45 to 55
sbar.50 <- rep (NA, length(vbar.range))#vector for predicted seats (based on medians)
prob <- rep (NA, length(vbar.range)) #set up vector for prob. of winning house
for (j in 1:length(vbar.range)){#loop over intervals of vbar
 vbar <- vbar.range[j]
 sbar <- rep (NA, n.sims) 
 for (i in 1:n.sims){
   v.adj2004 <- v2004 - phi*i2004 #adjusted vote, taking out incumbency
   normvote2006 <- .5 + rho*(v.adj2004 - .5) #normal vote
   locfree2006 <- normvote2006 + phi*i2006 #location free: normal vote plus adjusted vote
   locfreenoisy2006 <- rnorm(length(locfree2006), locfree2006, sigma) #add noise to loc.free var
   withuncs2006 <- ifelse (unc2006==-1, 1-inc.impute,
                           ifelse (unc2006==1, inc.impute, locfreenoisy2006))
   swingfree2006 <- withuncs2006 + mean(v2004) - mean(withuncs2006) #take out swing
   v2006 <- swingfree2006 + vbar - mean(swingfree2006) 
   #V2006 <- withuncs2006 + vbar + mean(v2004) - mean(withuncs2006) - mean(swingfree2006) 
   sbar[i] <- mean(v2006>.5)
 }
 sbar.50[j] <- mean(sbar)
 prob[j] <- pnorm((sbar.50[j] - 0.5)/rmse.predict)#use empirical predictive error for historical regressions (above)
}
#get v.bars when prob. == 10, 50, 90%
ten.percent.value <- mean(vbar.range[round(prob,1) ==.10])
fifty.percent.value <- mean(vbar.range[round(prob,1) ==.50])
ninety.percent.value <- mean(vbar.range[round(prob,1) ==.90])
print(ten.percent.value)
print(fifty.percent.value)
print(ninety.percent.value)

#get predicted seats and probability based on adv. for 2006
pred.seats.2006 <- mean(sbar.50[vbar.range==round(adv.2006,3)])
seats.error.2006 <- dem.seats.2006 - pred.seats.2006
bias.2006 <- mean(2*(sbar.50[round(vbar.range,2)==.50]-.5))
prob.2006 <- prob[vbar.range == round(adv.2006,3)]
#get probability dems would have won house if they got the same adv as GOP in 1994
prob.1994 <- prob[vbar.range == round(1-adv[unique.year.all==1994],2)]
#get number of seats GOP would have won with dems average district vote.
gop.seats.pred <- 1-mean(sbar.50[vbar.range==round(1-adv.2006,3)])
#gop.seats.pred*435 = 249

######################################################

#Predicted SV Curve
axis.size <- 1.1 #control size of axis tick marks and numbers
axis.label <- 1.1 #control size of axis labels
pdf("Seats_Votes_2006_prob_combined.pdf", height = 4, width = 8)
par (mfrow=c(1,2), mar = c(7,6,2,3), pty = "s")
plot(vbar.range, sbar.50, type = "l", 
    ylab = "",  xlab = "", axes = F, xaxs="i", yaxs="i",
    ylim = c(.4, .6), xlim = c(.40,.6),, 
    main = "")
#lines(lowess(vbar.range, sbar.50))
axis(1, at = c(.40,.45, .50,.55,.60), label = c("40%", "", "50%", "", "60%"), mgp = c(2,.5,0), cex.axis = axis.size)
axis(2, at = c(.40,.45, .50,.55,.60), label = c("40%", "", "50%", "", "60%"), las = 2, mgp = c(2,.5,0), cex.axis = axis.size)
mtext("Average district vote\nfor Democrats", 1, cex = axis.label, line = 3.1)
mtext("Democratic share\nof House seats", 2, cex = axis.label, line=3.1)
abline(h=.5,  col="gray", lwd=.5)
abline(v=.5,  col="gray", lwd=.5)
points(adv.2006, dem.seats.2006, type = "p", pch = 19, cex = .8)
box()

#pdf("Probability_graph_2006.pdf", height = 4, width = 6)
par (mar = c(7,6,2,3))
plot (vbar.range, prob, type="l", 
    main = "", 
    xlab ="", ylab ="", axes = F,  xaxs="i",
     yaxs="i", xlim = c(.4,.6), ylim = c(0,1))
axis(1, at = c(.40,.45, .50,.55,.60), label = c("40%", "", "50%", "", "60%"), mgp = c(2,.5,0), , cex.axis = axis.size)
axis(2, at = c(seq(0,1,by =.25)), label = c(0,".25",".5",".75",1), las = 2, mgp = c(2,.5,0), cex.axis = axis.size)
mtext("Average district vote\nfor Democrats", 1, cex = axis.label, line = 3.1)
mtext("Probability Democrats\nControl House", 2, cex = axis.label, line =2.7)
abline (h=.5,col="gray", lwd=.5)#50% line
abline (h=.1, col="gray", lwd=.5)#10 % line
abline (h=.9, col="gray", lwd=.5)#90% line
#add point for actual avd vote and predicted probabloty
points(adv.2006, prob.2006, type = "p", pch = 19, cex = .8)
box()
dev.off()
##########################################################

#presenting histograms from 1996-2006 election
    #use only uncontested races

attach.all(house.4604)
house.9604 <- house.4604[year>=1996,]
attach.all(house.9604)
year.9604 <- unique(year)
demwin <- na.omit(house.9604$dvote[house.9604$winner==1])#drop uncontesteds
gopwin <- 1- na.omit(house.9604$dvote[house.9604$winner==0])#drop uncontesteds; reverse to get gop share of vote
demwin.mean <- rep(NA, length(year.9604))#means for 1994-2004)
gopwin.mean <- rep(NA, length(year.9604))
demwin.2006 <- na.omit(house.2006$dvote[house.2006$winner==1]) #means for 2006
gopwin.2006 <- na.omit(1-house.2006$dvote[house.2006$winner==0])

#get means for each year, w/o uncontesteds 
for (i in 1:length(year.9604)){# dem means by year for winner
    keep <- year==year.9604[i] & winner == 1
    demwin.mean[i] <- mean(dvote[keep], na.rm=T)
    }
for (i in 1:length(year.9604)){#rep
    keep <- year==year.9604[i] & winner == 0
    gopwin.mean[i] <- 1 - mean(dvote[keep], na.rm=T)#
    }

#histograms, 1994--2006
pdf("hists9406.pdf", height = 4, width = 14)
#use this for 1996-2006
layout(rbind(c(13, 1, 2,3,4,5, 6), c(14, 7,8,9,10, 11, 12), #order so that dems first, then gop,
    c(21, 15, 16, 17, 18, 19, 20)),                          # then column labels, then row labels
    widths = cbind(c(1.3,2,2,2,2,2,2), c(1.3,2,2,2,2,2,2),c(1.3,2,2,2,2,2,2)),
    heights = c(2,2,1.5))
#layout.show(21)

par(mar = c(2, .5, 3.5,2))
#dems in top row, 1st: 1996-2004
for (i in 1:length(year.9604)){
    keep <- year==year.9604[i]
    demwin <- na.omit(house.9604$dvote[house.9604$winner==1 & keep])
    hist(demwin, axes = F, main = "",  xlab = "", ylab = "", xaxs = "i", yaxs="i",
    xlim = c(.5,1), nclass=n.bins(demwin)-1)
    abline(v=demwin.mean[i], lty = 2, lwd = 1.5)#draw line through mean
    axis(1, at = c(seq(.5, 1, by = .25)), label = c(50, 75, "100%"), 
        mgp = c(2,.7,0), cex.axis = axis.size)
    mtext(year.9604[i], 3, line = 1.5, cex = 1.5, font = 2)
    }
#add 2006
hist(demwin.2006, main = "", axes = F, xlim = c(.5,1), xaxs ="i", yaxs="i", 
    xlab ="", ylab ="", nclass=n.bins(demwin.2006))
  axis(1, at = c(seq(.5, 1, by = .25)), label = c(50, 75, "100%"), 
        mgp = c(2,.7,0), cex.axis = axis.size)
abline(v=mean(demwin.2006), lty = 2, lwd = 1.5)#draw line through mean
mtext(2006, 3, line = 1.5, cex = 1.5, font = 2)
#gop on bottom
par(mar = c(2, .5, 2, 2))
for (i in 1:length(year.9604)){
    keep <- year==year.9604[i]
    gopwin <- 1 - na.omit(house.9604$dvote[house.9604$winner==0 & keep])
    hist(gopwin, main = "", axes = F, xlab = "", ylab = "", xaxs = "i", yaxs="i",
        xlim = c(.5,1), nclass=n.bins(gopwin))
    abline(v=gopwin.mean[i], lty = 2, lwd = 1.5)#draw line through mean
    axis(1, at = c(seq(.5, 1, by = .25)), label = c(50, 75, "100%"), 
        mgp = c(2,.7,0), cex.axis = axis.size)
    }
#add 2006
hist(gopwin.2006, main = "", axes = F, xlim = c(.5,1), xaxs ="i", yaxs="i", 
    xlab ="", ylab ="", nclass=n.bins(gopwin.2006))
axis(1, at = c(seq(.5, 1, by = .25)), label = c(50, 75, "100%"), 
        mgp = c(2,.7,0), cex.axis = axis.size)
abline(v=mean(gopwin.2006), lty = 2, lwd = 1.5)#draw line through mean
    
#add column labels
par(mar = c(3, .5, 3, .5))
plot(0,0, xlim = c(.5, 1), ylim = c(0, 30), type = "n", axes = F)#call empty plot to keep on same scale
text(.8, 15, "Democratic\nwinners", cex = 1.8)
par(mar = c(3, .5, 3, .5))
plot(0,0, xlim = c(.5, 1), ylim = c(0, 30), type = "n", axes = F)#call empty plot to keep on same scale
text(.8, 15, "Republican\nwinners", cex = 1.8)
#add x-axis labels in each panel of bottom row
par(mar = c(2,2,2,2))
for (i in 1:6){
    plot(i,i, type = "n", axes = F)#call empty plot 
    mtext("Winner's share\nof 2-party vote", 3, line =-2.5, cex = 1.2)
    }

dev.off()
##############################################################
#for statistics cited within text of paper:
#get number of incumbents (Rep. v. Dem) who won with less than 58\% of vote
attach(house.2006)
#table(winner, incumb06)
#        incumb06
#winner  -1   0   1
#     0 189  13   0
#     1  22  20 191
gop.win.58 <- ifelse(winner==0 & dvoteimputed > .42 & incumb06 == -1, 1, 0)
#47 out of 189 GOP incumbents who won with less than 58% of vote == 24%
dem.win.58 <- ifelse(winner==1 & dvoteimputed < .58 & incumb06 == 1, 1, 0)
#7 out of 191 Dem incumbents won with less than 58%  == 4%

#get numbers of winners Rep. v. Dem) who won with less than 60% of vote
gop.win.60 <- ifelse(winner==0 & dvoteimputed > .4, 1, 0)
#78 GOP candidates won w/ less than 60% of vote
dem.win.60 <- ifelse(winner==1 & dvoteimputed < .6, 1, 0)
#40 Dem candidates won w/ less than 60% of vote
#total == 118 winners with less than 60%
#Thus, gop won 78/118 close races, or 66%


##################################################################
#PREDICTING THE 2008 SEATS VOTES CURVE
#reverse so that curve gives prob of Reps taking the House
#for incumbents: within each draw each has 90% chance of running
#paramater estimates for 2008 (same as 2006)
phi <- .08
rho <- .71
sigma <- .066 
inc.impute <- .75 #for uncontested races

attach.all(house.2006)
i2006 <- incumb06
i2008 <- ifelse(winner ==1, 1,-1)
n.sims <- 1000
vbar.2006 <- adv.2006#mean vote for 2006
v2006 <- dvote.2006 
vbar.2008.range <- round(vbar.2006,2) + seq(-.2,.2,.002) #create range from 45 to 55
sbar.2008.50 <- rep (NA, length(vbar.2008.range))
prob.2008 <- rep (NA, length(vbar.2008.range)) #set up vector for prob.2008. of winning house
for (j in 1:length(vbar.2008.range)){#loop over intervals of vbar.2008
 vbar.2008 <- vbar.2008.range[j]
 sbar.2008 <- rep (NA, n.sims) 
 for (i in 1:n.sims){
   i.random <- runif(435)
   i2008.draw <- ifelse(i.random < .90001, i2008, 0)
   v.adj2006 <- v2006 - phi*i2006 #adjusted vote, taking out incumbency
   normvote2008 <- .5 + rho*(v.adj2006 - .5) #normal vote
   locfree2008 <- normvote2008 + phi*i2008.draw #location free: normal vote plus adjusted vote
   locfreenoisy2008 <- rnorm(length(locfree2008), locfree2008, sigma) #add noise to loc.free var
   swingfree2008 <- locfreenoisy2008 + mean(v2006) - mean(locfreenoisy2008) #take out swing
   v2008 <- swingfree2008 + vbar.2008 - mean(swingfree2008) 
   sbar.2008[i] <- mean(v2008>.5)
 }
 sbar.2008.50[j] <- mean(sbar.2008)#mean
 prob.2008[j] <- pnorm((sbar.2008.50[j] - 0.5)/rmse.predict)
}
#get v.bars when prob.2008. == 10, 50, 90%
ten.percent.value.2008 <- mean(vbar.2008.range[round(prob.2008,1) ==.10])
fifty.percent.value.2008 <- mean(vbar.2008.range[round(prob.2008,1) ==.50])
ninety.percent.value.2008 <- mean(vbar.2008.range[round(prob.2008,1) ==.90])
ten.percent.value.2008
fifty.percent.value.2008
ninety.percent.value.2008

#Predicted SV Curve
axis.size <- 1.1 #control size of axis tick marks and numbers
axis.label <- 1.1 #control size of axis labels
pdf("Seats_Votes_2008_prob_combined.pdf", height =4, width = 8)
par (mfrow=c(1,2), mar = c(7,6,2,3), pty = "s")
plot(vbar.2008.range, sbar.2008.50, type = "l", 
    ylab = "",  xlab = "", axes = F, xaxs="i", yaxs="i",
    ylim = c(.4, .6), xlim = c(.40,.6),, 
    main = "")
#lines(lowess(1-vbar.2008.range, sbar.2008.50))
axis(1, at = c(.40,.45, .50,.55,.60), label = c("40%", "", "50%", "", "60%"), mgp = c(2,.5,0), cex.axis = axis.size)
axis(2, at = c(.40,.45, .50,.55,.60), label = c("40%", "", "50%", "", "60%"), las = 2, mgp = c(2,.5,0), cex.axis = axis.size)
mtext("Average district vote\nfor Democrats", 1, cex = axis.label, line = 3.1)
mtext("Democratic share\nof House seats", 2, cex = axis.label, line=3.1)
abline(h=.5,  col="gray", lwd=.5)
abline(v=.5,  col="gray", lwd=.5)
box()

par (mar = c(7,6,2,3))
plot (vbar.2008.range, prob.2008, type="l", 
    main = "", 
    xlab ="", ylab ="", axes = F,  xaxs="i",
     yaxs="i", xlim = c(.4,.6), ylim = c(0,1))
axis(1, at = c(.40,.45, .50,.55,.60), label = c("40%", "", "50%", "", "60%"), mgp = c(2,.5,0), , cex.axis = axis.size)
axis(2, at = c(seq(0,1,by =.25)), label = c(0,".25",".5",".75",1), las = 2, mgp = c(2,.5,0), cex.axis = axis.size)
mtext("Average district vote\nfor Democrats", 1, cex = axis.label, line = 3.1)
mtext("Probability Democrats\nControl House", 2, cex = axis.label, line =2.7)
abline (h=.5,col="gray", lwd=.5)#50% line
abline (h=.1, col="gray", lwd=.5)#10 % line
abline (h=.9, col="gray", lwd=.5)#90% line
box()
dev.off()


#total vs. average, 1946-2006
year.at.vec <- c((seq(1946, 2006, by = 2)))
year.label.vec <- c("", "", "1950", "", "", "","", "1960", "","","", "", "1970",
    "","","", "", "1980", "","","", "", "1990", "","","", "", "2000","", "", "2006")

pdf("Total_vs_Average.pdf", height =5, width = 13)
par(mfrow = c(1,1), mar = c(6,6, 2,1))
plot(unique.year.all, adv, type = "n",
    ylim = c(.45, .6), main = "", xlab = "",ylab = "", axes = F, xaxs = "i", yaxs="i")
shade.color = "gray92"
polygon(x=c(1946, 1946, 1948, 1948),
    y=par()$usr[c(3,4,4,3)],
    col= shade.color,   ## desired color
    border=F)         ## no border
polygon(x=c(1952, 1952, 1954, 1954),
    y=par()$usr[c(3,4,4,3)],
    col= shade.color,   ## desired color
    border=F)         ## no border
polygon(x=c(1994, 1994, 2006, 2006),
    y=par()$usr[c(3,4,4,3)],
    col= shade.color,   ## desired color
    border=F)         ## no border
points(unique.year.all, adv, type = "l")
points(unique.year.all, dem.per.total, type = "l", lty =2)
axis(1, at =  year.at.vec, labels = year.label.vec, las = 1, cex.axis = 1.1, mgp = c(2,.5,0))
axis(2, las = 1, at = c(seq(.45, .60, by = .05)), 
    label = c("40%", "50%", "55%", "60%"), cex.axis = 1.2,mgp = c(2,.6,0))
mtext("Democratic share of\nthe two-party vote", 2, line  = 3, cex = 1.4)
text(1968.2, .592, "Average District Vote")
segments(1965, .585, 1964.5, .58)
text(1971, .51, "Total Vote")
segments(1969, .51, 1968.25,.51)
segments(1946, .5, 2006, .5,  col="gray", lwd=.5)#add light line for 50%
dev.off()
